library(readr)
library(dplyr)
library(tidyr)
library(ggplot2)
library(stringr)
require(RCurl)
library(lubridate)
url_file_prov<-"https://raw.githubusercontent.com/montera34/escovid19data/master/data/output/covid19-provincias-spain_consolidated.csv"
url_file_poblacion <- "https://raw.githubusercontent.com/montera34/escovid19data/master/data/original/provincias-poblacion.csv"
provincias <- read.csv( url_file_prov,encoding = "UTF-8")
provincias_poblacion <- read.csv( url_file_poblacion,encoding = "UTF-8")
provincias <- provincias %>%
filter (date >= "2020-12-02")
provincias$date <- as.Date(provincias$date,format= "%Y-%m-%d")
provincias <- provincias %>%
select (date,province,ccaa,hospitalized,intensive_care,daily_deaths) %>%
merge(select(provincias_poblacion,provincia,poblacion,ine_code),
by.x = "province", by.y = "provincia" ) %>%
mutate(hospitalized_tot = ifelse(ccaa == "AndalucĆa" |
ccaa == "Madrid, Comunidad de" |
ccaa == "Cantabria" |
ccaa == "Murcia" |
ccaa == "Cantabria",hospitalized ,
hospitalized+intensive_care)) %>%
mutate (hospitalized_nor =hospitalized_tot *100000/ poblacion ) %>%
mutate (intensive_care_nor = intensive_care *100000/ poblacion) %>%
mutate (daily_deaths_nor =daily_deaths *100000/ poblacion ) %>%
mutate (year = as.integer(format(date, format="%Y")) -2020) %>%
mutate (week = (as.integer(format(date, format="%U")) +1)+(52*year)) %>%
arrange (week)
theme_a <- function(base_size = 12,
base_family = "sans"
)
{
tema <-
theme_bw(base_size=base_size) +
theme(legend.position="top") +
theme(legend.text = element_text(size=base_size+1,family = base_family)) +
theme(plot.title=element_text(size=base_size+2,
vjust=1.25,
family=base_family,
hjust = 0.5
)) +
theme(plot.subtitle=element_text(size=base_size, family = base_family)) +
theme(text = element_text(size=base_size+1,family = base_family)) +
theme(axis.text.x=element_text(size=base_size,family = base_family)) +
theme(axis.text.y=element_text(size=base_size, family = base_family)) +
theme(axis.title.x=element_text(size=base_size, vjust=0, family = base_family)) +
theme(axis.title.y=element_text(size=base_size, vjust=1.25, family = base_family)) +
theme(plot.caption=element_text(size=base_size-2, family = base_family)) +
theme(strip.text = element_text(size=base_size+1, family = base_family)) +
theme(strip.text.x = element_text(size=base_size, family = base_family)) +
theme(strip.text.y = element_text(size=base_size,, family = base_family))
return (tema)
}
tendencia <- function (provincias,indicator,text_indicator,max_size,skip,left_margin){
library(ggalt)
weeks <- unique(provincias$week)
first_week <- min(weeks,na.rm = TRUE)
last_week <- max(weeks,na.rm = TRUE)
last_week_text <- last_week %% 53
name_file <- paste0("images/diferencias_",indicator,"_tendencia.png")
chart_title <- paste("Diferencia de",text_indicator, "entre las semanas",last_week_text-1, "y",last_week_text )
diff <- mutate (provincias,indicator=provincias[[indicator]]) %>%
filter (week == last_week | week == last_week-1 ) %>%
filter (!is.na (indicator) ) %>%
select (week,province,indicator,ccaa) %>%
group_by(week,province,ccaa) %>%
summarise ( indicator_week = mean (indicator), .groups = 'drop') %>%
mutate (indicator_last_week = ifelse(week == last_week, indicator_week, NA)) %>%
mutate (indicator_pre_last_week = ifelse(week == last_week-1, indicator_week, NA))%>%
group_by(province,ccaa) %>%
summarise ( indicator_last_week = sum (indicator_last_week,na.rm = TRUE),
indicator_pre_last_week = sum (indicator_pre_last_week,na.rm = TRUE) ,
indicator_diff = indicator_last_week - indicator_pre_last_week,
.groups = 'drop') %>%
filter (indicator_last_week > 0 & indicator_pre_last_week > 0,) %>%
mutate(Color = ifelse(indicator_diff >0, "Incremento", "Descenso"))
grafica <-
ggplot(data = diff) +
geom_segment(aes(x=indicator_pre_last_week,
xend=indicator_last_week,
y= reorder(province,indicator_last_week),
yend=province,
color=Color),
arrow = arrow(length=unit(0.20,"cm"), ends="last", type = "closed"),size=1)+
geom_line (aes(x=indicator_last_week,
y= as.numeric(reorder(province,indicator_last_week))),
size=4, alpha=0.5, color = "gray") +
labs(x = "Variación", y = "Provincias",
caption = "By @congosto\nFuente: @escovid19data. https://github.com/montera34/escovid19data",
title = chart_title,
subtitle="Normalizado a 100.000 habitantes.Los valores semanales se calculan como media\nEn rojo las provincias que aumentan las hospitalizaciones, en azul las que disminuyen")+
scale_colour_manual('', values = c('Descenso'='steelblue4', 'Incremento'='red4')) +
scale_x_continuous(name=text_indicator,position = "top",
limit = c(-left_margin,max_size),
breaks=seq(0, max_size, by = skip),
#expand = c(0,0),
sec.axis = dup_axis() ) +
theme_a()+
theme(axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.x=element_blank(),
axis.ticks.y=element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(colour = "gray")) +
ggsave(name_file,width = 200, height = 200,unit="mm")
return (grafica)
}
GrƔfica sin nombres de provincia para destacar la tendencia.
res <- tendencia (provincias,"hospitalized_nor", "Hospitalizados",50,25,1)
plot(res)
distribucion <- function (provincias,indicator,text_indicator) {
weeks <- unique(provincias$week)
first_week <- min(weeks,na.rm = TRUE)
last_week <- max(weeks,na.rm = TRUE)
name_file <- paste0("images/diferencias_",indicator,"_distribucion.png")
chart_title <- paste("Distribución de",text_indicator, "el último mes" )
df <- mutate (provincias,indicator=provincias[[indicator]]) %>%
filter (!is.na (indicator) ) %>%
rename (fecha = date) %>%
filter (week > last_week -4) %>%
select (fecha,week,province,indicator) %>%
group_by(week,province)%>%
summarise ( indicator_week = mean (indicator),
fecha = lubridate::ymd( "2021-01-01" ) + lubridate::weeks( week - 53 ),
.groups = 'drop')
grafica <- ggplot(data = df,
aes( x=fecha, y=indicator_week, group = week)) +
geom_boxplot(color="red4") +
stat_summary(geom="text", fun=quantile,
aes(label=sprintf("%1.1f", ..y..)),
position=position_nudge(x=3.50), size=3.5) +
labs( title = chart_title,
caption = "By @congosto\nFuente: @escovid19data. https://github.com/montera34/escovid19data")+
guides(color = FALSE)+
theme_a()+
theme(axis.title.y=element_blank(),
axis.title.x=element_blank(),
panel.grid.major.y = element_blank(),
legend.position="top",legend.text = element_text(size=11))
ggsave(name_file,width = 200, height = 125,unit="mm")
return (grafica)
}
res <- distribucion (provincias,"hospitalized_nor", "Hospitalizados")
plot(res)
### Distribución de UCI
res <- distribucion (provincias,"intensive_care_nor", "UCI")
plot(res)
### GrÔfica de variación (incremento-descenso)
dumbbell_A <- function (provincias,indicator,text_indicator,max_size,skip,left_margin){
library(ggalt)
weeks <- unique(provincias$week)
first_week <- min(weeks,na.rm = TRUE)
last_week <- max(weeks,na.rm = TRUE)
last_week_text <- last_week %% 53
name_file <- paste0("images/diferencias_",indicator,"_incremento_dumbbell.png")
chart_title <- paste("Diferencia de",text_indicator, "entre las semanas",last_week_text-1, "y",last_week_text )
diff <- mutate (provincias,indicator=provincias[[indicator]]) %>%
filter (!is.na (indicator) ) %>%
filter (week == last_week | week == last_week-1 ) %>%
select (week,province,indicator) %>%
group_by(week,province) %>%
summarise ( indicator_week = mean (indicator), .groups = 'drop') %>%
mutate(indicator_last_week = ifelse(week == last_week, indicator_week, NA)) %>%
mutate(indicator_pre_last_week = ifelse(week == last_week-1, indicator_week, NA)) %>%
group_by(province) %>%
summarise ( indicator_last_week = sum (indicator_last_week,na.rm = TRUE),
indicator_pre_last_week = sum (indicator_pre_last_week,na.rm = TRUE) ,
indicator_diff = indicator_last_week - indicator_pre_last_week,na.rm = TRUE,
.groups = 'drop') %>%
filter (indicator_last_week > 0 & indicator_pre_last_week > 0) %>%
mutate(Color = ifelse(indicator_diff >0, "Incremento", "Descenso"),
ajuste_text = ifelse(indicator_diff >=0, -0.2,1.2),
ajuste_perc = ifelse(indicator_diff > 0, 1.2,-0.2))
p <-
ggplot( data = diff) +
geom_segment(aes(x=indicator_pre_last_week,
xend=indicator_last_week,
y= reorder(province,indicator_last_week),
yend=province,
color=Color),
arrow = arrow(length=unit(0.20,"cm"), ends="last", type = "closed"),size=1)+
geom_line (aes(x=indicator_last_week,
y= as.numeric(reorder(province,indicator_last_week))),
size=4, alpha=0.5, color = "gray") +
geom_text(aes(x=indicator_last_week,
y= reorder(province,indicator_last_week), color=Color, label = province,
hjust= ajuste_text),
size=4, vjust=0, show.legend = FALSE ) +
geom_text(aes(x=indicator_pre_last_week,
y= reorder(province,indicator_last_week), color=Color,label = round(indicator_diff,1),
hjust= ajuste_perc),
size=4, vjust=0, show.legend = FALSE ) +
labs(x = "Variación", y = "Provincia",
caption = "By @congosto\nFuente: @escovid19data. https://github.com/montera34/escovid19data",
title = chart_title,
subtitle="Normalizado a 100.000 habitantes.Los valores semanales se calculan como media\nEn rojo las provincias que aumentan las hospitalizaciones, en azul las que disminuyen.\nLos números corresponden a la variación entre ambas semanas")+
scale_colour_manual('', values = c('Descenso'='steelblue4', 'Incremento'='red4')) +
scale_x_continuous(name=text_indicator,position = "top",
limit = c(-left_margin,max_size),
breaks=seq(0, max_size, by = skip),
sec.axis = dup_axis() ) +
theme_a()+
theme(axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
axis.ticks.x=element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(colour = "gray")) +
ggsave(name_file,width = 200, height = 325,unit="mm")
return (p)
}
dumbbell_B <- function (provincias,indicator,text_indicator,max_size,skip,left_margin){
library(ggalt)
weeks <- unique(provincias$week)
first_week <- min(weeks,na.rm = TRUE)
last_week <- max(weeks,na.rm = TRUE)
last_week_text <- last_week %% 53
name_file <- paste0("images/diferencias_",indicator,"_valor_dumbbell.png")
chart_title <- paste("Diferencia de",text_indicator, "entre las semanas", last_week_text-1,"y",last_week_text )
diff <- mutate (provincias,indicator=provincias[[indicator]]) %>%
filter (!is.na (indicator) ) %>%
select (week,province,indicator) %>%
group_by(week,province) %>%
summarise ( indicator_week = mean (indicator), .groups = 'drop') %>%
mutate(indicator_last_week = ifelse(week == last_week, indicator_week, NA)) %>%
mutate(indicator_pre_last_week = ifelse(week == last_week-1, indicator_week, NA)) %>%
group_by(province) %>%
summarise ( indicator_last_week = sum (indicator_last_week,na.rm = TRUE),
indicator_pre_last_week = sum (indicator_pre_last_week,na.rm = TRUE) ,
indicator_diff = indicator_last_week - indicator_pre_last_week,na.rm = TRUE,
.groups = 'drop') %>%
filter (indicator_last_week > 0 & indicator_pre_last_week > 0) %>%
mutate(Color = ifelse(indicator_diff >0, "Incremento", "Descenso"),
text_last_week = ifelse(indicator_diff >0, paste(round(indicator_last_week,1),province),
paste(province,round(indicator_last_week,1))),
ajuste_last_week = ifelse(indicator_diff >0, -0.2,1.2),
ajuste_pre_last_week = ifelse(indicator_diff >0 ,1.2, -0.2))
p <-
ggplot(data = diff) +
geom_segment(aes(x=indicator_pre_last_week,
xend=indicator_last_week,
y= reorder(province,indicator_last_week),
yend=province,
color=Color),
arrow = arrow(length=unit(0.20,"cm"), ends="last", type = "closed"),size=1)+
geom_line (aes(x=indicator_last_week,
y= as.numeric(reorder(province,indicator_last_week))),
size=4, alpha=0.7, color = "gray") +
geom_text(aes(x=indicator_last_week,
y= reorder(province,indicator_last_week), color=Color,
label = text_last_week,
hjust= ajuste_last_week),
size=4, vjust=0, show.legend = FALSE ) +
geom_text(aes(x=indicator_pre_last_week,
y= reorder(province,indicator_last_week), color=Color,
label = round(indicator_pre_last_week,1),
hjust= ajuste_pre_last_week),
size=4, vjust=0, show.legend = FALSE ) +
labs(x = "Variación", y = "Provincia",
caption = "By @congosto\nFuente: @escovid19data. https://github.com/montera34/escovid19data",
title = chart_title,
subtitle="Normalizado a 100.000 habitantes.Los valores semanales se calculan como media\nEn rojo las provincias que aumentan las hospitalizaciones, en azul las que disminuyen.\nLos nĆŗmeros corresponden a la tasa de cada semana")+
scale_colour_manual('', values = c('Descenso'='steelblue4', 'Incremento'='red4')) +
scale_x_continuous(name=text_indicator,position = "top",
limit = c(-left_margin,max_size),
breaks=seq(0, max_size, by = skip),
sec.axis = dup_axis() ) +
theme_a()+
theme(axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
axis.ticks.x=element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.major.x = element_line(colour = "gray")) +
ggsave(name_file,width = 200, height = 325, units = "mm")
return (p)
}
res <- dumbbell_A (provincias,"hospitalized_nor", "Hospitalizados",50,25,5)
plot(res)
res <- dumbbell_B (provincias,"hospitalized_nor", "Hospitalizados",50,25,5)
plot(res)
res <- dumbbell_A (provincias,"intensive_care_nor", "UCI",15,5,1)
plot(res)
res <- dumbbell_B (provincias,"intensive_care_nor", "UCI",15,5,1.2)
plot(res)
heat_map_resumen <- function (provincias,indicator,text_indicator){
weeks <- unique(provincias$week)
first_week <- min(weeks,na.rm = TRUE)
last_week <- max(weeks,na.rm = TRUE)
name_file <- paste0("images/tercera_ola_",indicator,"_resumen.png")
chart_title <- paste("Evolución de", text_indicator)
df <- mutate (provincias,indicator=provincias[[indicator]]) %>%
filter (!is.na (indicator) ) %>%
select (week,province,indicator) %>%
group_by(week,province) %>%
summarise ( indicator_week = mean (indicator), .groups = 'drop') %>%
mutate(indicator_last_week = ifelse(week == last_week, indicator_week, 0))
p <-
ggplot(data = df,
aes(x = week, y = reorder(province,(indicator_last_week)), fill=indicator_week)) +
geom_tile()+
geom_text(aes(label = round(indicator_week,1)), color="white", size=2.5,hjust=0.5, vjust=0.5 ) +
scale_fill_gradient(low = "steelblue", high = "red4")+
labs(y = "Provincia",
caption = "By @congosto\nFuente: @escovid19data. https://github.com/montera34/escovid19data",
title = chart_title,
subtitle="Tercera Ola (desde el 2 de diciembre). Normalizado a 100.000 habitantes")+
scale_x_continuous(name="Semanas", breaks= weeks ,position = "top",
sec.axis = dup_axis()) +
theme_a()+
theme(legend.title=element_blank(),
axis.ticks.y=element_blank(),
axis.ticks.x=element_blank(),
axis.title.y=element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.text.x=element_text(size=10),
axis.text.y=element_text(size=10),
legend.position="right") +
ggsave(name_file)
return (p)
}
res <- heat_map_resumen (provincias,"hospitalized_nor", "Hospitalizados")
plot(res)
res <- heat_map_resumen (provincias,"intensive_care_nor", "UCI")
plot(res)
res <- heat_map_resumen (provincias,"daily_deaths_nor", "Fallecimientos")
plot(res)
GrÔficas inspiradas en las realizadas por @BIOCOMSC1 sobre el riesgo del COVID19. Dan mucha información con pocos elementos (poca tinta), permitiendo ver la evolución, el estado actual y las zonas de riesgo. Risk Diagrams
Cuando los datos tienen una componente temporal normalmente usamos un line chart. En estas grÔficas se representa la relación de dos variables a través del tiempo de una manera distinta. Es un diagrama de puntos unidos según su evolución en el tiempo.
No sĆ© el nombre que reciben estos grĆ”ficos pero William Phillips los utilizó 1958 para representar la evolución del desempleo y la inflación a travĆ©s del tiempo, que ha pasado a la historia como āla curva Phillipsā.
la curva de Phillips
Las grÔficas han sido mejoradas con la idea de Maarten Lambrechts de agrupar los datos semanalmente y señalar en el path el número de la semana.
La fuente de datos es el repositorio #escovid19data
Las grÔficas muestran la relación entre UCI y hospitalizados calculados como media semanal.
La semana de inicio es la 32 (3 Ago~9 Ago)
El código de color usado es:
connected_scatterplot_hos_UCI <- function (provincias,autonomia) {
autonomia_text <- str_replace_all(autonomia, " ", "_")
name_file <- paste0("images/tercera_ola_",autonomia_text,"_UCI_vs_hospitalizados.png")
chart_title <- paste("Evolución de los Hospitalizados vs. UCI (media semanal) en",autonomia)
df <- filter (provincias,ccaa == autonomia) %>%
filter (!is.na(hospitalized_nor)) %>%
filter (!is.na(intensive_care_nor)) %>%
mutate(tipo_fecha = ifelse(week == min(week), "primera",
ifelse(week == max(week), "ultima","otras"))) %>%
group_by(week,province) %>%
summarise ( hospitalized_nor_week = mean(hospitalized_nor),
daily_death_nor_week = mean(daily_deaths_nor),
intensive_care_nor_week = mean(intensive_care_nor),
tipo_fecha = tipo_fecha,
.groups = 'drop') %>%
arrange (week,province)
p <-
ggplot(data = df,
aes(x=hospitalized_nor_week, y=intensive_care_nor_week, color=tipo_fecha ))+
geom_path (color="cornflowerblue",size=0.5,alpha=0.5)+
geom_label(aes(label = ifelse(week > 53 ,week %% 53,week)),
label.padding = unit(0.15, "lines"),
size=3,hjust=1.1, vjust=0.5 ) +
labs(x = "Hospitalizados (media semanal)", y = "UCI (media semanal)",
caption = "By @congosto\nFuente: @escovid19data. https://github.com/montera34/escovid19data",
title = chart_title,
subtitle="Tercera Ola (desde el 2 de diciembre). Normalizado a 100.000 habitantes") +
scale_colour_manual('', values = c('primera' = 'forestgreen',
'ultima' = 'red',
'otras' = 'steelblue4')) +
guides(color = FALSE) +
theme_a()+
facet_wrap(~ province) +
ggsave(name_file )
return(p)
}
Segunda Ola (desde el 3 de agosto)
res <- connected_scatterplot_hos_UCI (provincias,"AndalucĆa")
plot(res)
Segunda Ola (desde el 3 de agosto)
res <- connected_scatterplot_hos_UCI (provincias,"Aragón")
plot(res)
Segunda Ola (desde el 3 de agosto)
res <- connected_scatterplot_hos_UCI (provincias,"Asturias, Principado de")
plot(res)
Segunda Ola (desde el 3 de agosto)
res <- connected_scatterplot_hos_UCI (provincias,"Balears, Illes")
plot(res)
res <- connected_scatterplot_hos_UCI (provincias,"Canarias")
plot(res)
Segunda Ola (desde el 3 de agosto)
res <- connected_scatterplot_hos_UCI (provincias,"Cantabria")
plot(res)
Segunda Ola (desde el 3 de agosto)
res <- connected_scatterplot_hos_UCI (provincias,"Castilla y León")
plot(res)
Segunda Ola (desde el 3 de agosto)
res <- connected_scatterplot_hos_UCI (provincias,"Castilla - La Mancha")
plot(res)
Segunda Ola (desde el 3 de agosto)
res <- connected_scatterplot_hos_UCI (provincias,"CataluƱa")
plot(res)
Segunda Ola (desde el 3 de agosto)
res <- connected_scatterplot_hos_UCI (provincias,"Comunitat Valenciana")
plot(res)
Segunda Ola (desde el 3 de agosto)
res <- connected_scatterplot_hos_UCI (provincias,"Extremadura")
plot(res)
Segunda Ola (desde el 3 de agosto)
res <- connected_scatterplot_hos_UCI (provincias,"Galicia")
plot(res)
Segunda Ola (desde el 3 de agosto)
res <- connected_scatterplot_hos_UCI (provincias,"Madrid, Comunidad de")
plot(res)
Segunda Ola (desde el 3 de agosto)
res <- connected_scatterplot_hos_UCI (provincias,"Murcia, Región de")
plot(res)
Segunda Ola (desde el 3 de agosto)
#res <- connected_scatterplot_hos_UCI (provincias,"PaĆs Vasco")
#plot(res)
Segunda Ola (desde el 3 de agosto)
res <- connected_scatterplot_hos_UCI (provincias,"Rioja, La")
plot(res)
Segunda Ola (desde el 3 de agosto)
res <- connected_scatterplot_hos_UCI (provincias,"Ceuta")
plot(res)
Segunda Ola (desde el 3 de agosto)
res <- connected_scatterplot_hos_UCI (provincias,"Melilla")
plot(res)